Aggregated and atomic
scores per method
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#>
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#>
#> last_plot
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following object is masked from 'package:graphics':
#>
#> layout
# datasets = read_yaml("datasets.yml")
# print(score_file)
# datasets = read_yaml("datasets.yml")
# datasets = read_yaml(file_dataset)
list_wd = strsplit(getwd(),'/')[[1]]
# Snakemake script : the current working dir is hadaca3_framework
if(list_wd[length(list_wd)] == 'hadaca3_framework'){
score_files = list(list.files(path = "./output/scores/", full.names = TRUE))
# nextflow script :
}else{
score_files = list(list.files(pattern = 'score-li*' ))
}
results_li <- data.frame(
dataset = character(),
ref = character(),
preprocessing_mixRNA = character(),
feature_selection_mixRNA = character(),
preprocessing_RNA = character(),
feature_selection_RNA = character(),
preprocessing_scRNA = character(),
feature_selection_scRNA = character(),
deconvolution_rna = character(),
preprocessing_mixMET = character(),
feature_selection_mixMET = character(),
preprocessing_MET = character(),
feature_selection_MET = character(),
deconvolution_met = character(),
late_integration = character(),
aid = numeric(),
aid_norm = numeric(),
aitchison = numeric(),
aitchison_norm = numeric(),
jsd = numeric(),
jsd_norm = numeric(),
mae = numeric(),
mae_norm = numeric(),
pearson_col = numeric(),
pearson_col_norm = numeric(),
pearson_row = numeric(),
pearson_row_norm = numeric(),
pearson_tot = numeric(),
pearson_tot_norm = numeric(),
rmse = numeric(),
rmse_norm = numeric(),
score_aggreg = numeric(),
sdid = numeric(),
sdid_norm = numeric(),
spearman_col = numeric(),
spearman_col_norm = numeric(),
spearman_row = numeric(),
spearman_row_norm = numeric(),
spearman_tot = numeric(),
spearman_tot_norm = numeric()
)
i = 0
for (score_file in score_files[[1]]) {
# Extract the base name of the file
base_name <- basename(score_file)
# Extract components from the file name
components <- str_match(base_name,
# dt ref OMIC ppmR fsmR omic ppR fsR omic ppSR fsSR deR omic ppmM fsmM omic ppM fsM deM li
# "score-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
"score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]
# components <- str_match(base_name, "score-(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)_(.+)")[2:8]
scores <- read_hdf5(score_file)
# Append the extracted information to the results data frame
results_li <- rbind(results_li,
cbind(
data.frame(
dataset = components[1],
ref = components[2],
preprocessing_mixRNA = components[3],
feature_selection_mixRNA = components[4],
preprocessing_RNA = components[5],
feature_selection_RNA = components[6],
preprocessing_scRNA = components[7],
feature_selection_scRNA = components[8],
deconvolution_rna = components[9],
preprocessing_mixMET = components[10],
feature_selection_mixMET = components[11],
preprocessing_MET = components[12],
feature_selection_MET = components[13],
deconvolution_met = components[14],
late_integration = components[15],
stringsAsFactors = FALSE
),
scores
))
rownames(results_li) = NULL
i = i +1
}
results_li %>%
# filter(dc==2) %>%
group_by(late_integration) %>%
summarise(GlobalScore = median(score_aggreg)) %>%
arrange(desc(GlobalScore))
#> # A tibble: 3 × 2
#> late_integration GlobalScore
#> <chr> <dbl>
#> 1 OnlyMet 0.663
#> 2 limeanRMSE 0.660
#> 3 OnlyRna 0.646
results_li_top5 = results_li %>%
select(dataset:late_integration, score_aggreg) %>%
arrange(desc(score_aggreg)) %>%
slice_head(n = 5)
results_li_top5
#> dataset ref preprocessing_mixRNA feature_selection_mixRNA preprocessing_RNA
#> 1 invivo1 ref LogNorm Toastbulknbfs LogNorm
#> 2 invivo1 ref LogNorm Toastbulknbfs LogNorm
#> 3 invivo1 ref LogNorm Toastbulknbfs LogNorm
#> 4 invivo1 ref LogNorm Toastbulknbfs LogNorm
#> 5 invivo1 ref ppID Toastbulknbfs ppID
#> feature_selection_RNA preprocessing_scRNA feature_selection_scRNA
#> 1 Toastbulknbfs LogNorm fsID
#> 2 Toastbulknbfs LogNorm Toastbulknbfs
#> 3 Toastbulknbfs ppID fsID
#> 4 Toastbulknbfs ppID Toastbulknbfs
#> 5 Toastbulknbfs LogNorm fsID
#> deconvolution_rna preprocessing_mixMET feature_selection_mixMET
#> 1 nnls nopp nofs
#> 2 nnls nopp nofs
#> 3 nnls nopp nofs
#> 4 nnls nopp nofs
#> 5 nnls nopp nofs
#> preprocessing_MET feature_selection_MET deconvolution_met late_integration
#> 1 nopp nofs node OnlyRna
#> 2 nopp nofs node OnlyRna
#> 3 nopp nofs node OnlyRna
#> 4 nopp nofs node OnlyRna
#> 5 nopp nofs node OnlyRna
#> score_aggreg
#> 1 0.8699194
#> 2 0.8699194
#> 3 0.8699194
#> 4 0.8699194
#> 5 0.8550689
# prediction_file = sapply(1:nrow(results_li_top5), \(i){paste0("pred-li-",paste(results_li_top5[i,1:15],collapse = "_") ,".h5")})
# pred = lapply(prediction_file, \(path){read_hdf5(path)})
# pred
all_data_used = c('dataset', 'ref')
for(data_used in all_data_used){
results_li[[data_used]] = factor(results_li[[data_used]],
levels = unique(results_li[[data_used]])) # levels will be alphabeticaly ordered
}
all_functions_li = c('preprocessing_mixRNA', 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA', 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna', 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met', 'late_integration' )
for( fun in all_functions_li){
results_li[[fun]] = factor(results_li[[fun]],
levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset=='invitro1'],decreasing = T)])) # sort based on the results_li on the in vitro dataset
}
index_aggreg <- which(names(results_li) == "score_aggreg")
datatable(
results_li[, c(1:length(all_functions_li)+2, index_aggreg)],
extensions = 'Buttons',
options = list(
pageLength = 10,
dom = 'Bfrtip', # This includes the Buttons extension in the layout
buttons = list(
list(
extend = 'colvis',
text = 'Show/Hide Columns',
columns = ':not(:first-child)' # This allows all columns except the first to be toggled
)
)
)
)